{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import csv\n",
    "import random\n",
    "import jsonlines\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Index(['id', 'NAME', 'host id', 'host_identity_verified', 'host name',\n",
      "       'neighbourhood group', 'neighbourhood', 'lat', 'long', 'country',\n",
      "       'country code', 'instant_bookable', 'cancellation_policy', 'room type',\n",
      "       'Construction year', 'price', 'service fee', 'minimum nights',\n",
      "       'number of reviews', 'last review', 'reviews per month',\n",
      "       'review rate number', 'calculated host listings count',\n",
      "       'availability 365', 'house_rules', 'license'],\n",
      "      dtype='object')\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_27037/2741766514.py:2: DtypeWarning: Columns (25) have mixed types. Specify dtype option on import or set low_memory=False.\n",
      "  data = pd.read_csv(file_path)\n"
     ]
    }
   ],
   "source": [
    "file_path = \"/<YOUR_OWN_PATH>/ToolQA/data/raw_data/airbnb/Airbnb_Open_Data.csv\"\n",
    "data = pd.read_csv(file_path)\n",
    "print(data.columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "286.0\n"
     ]
    }
   ],
   "source": [
    "print(data.iloc[0]['availability 365'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "num_questions_per_template = 10\n",
    "question_id = 0\n",
    "questions = []"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'qid': 'easy-airbnb-0000', 'question': \"What is the host's name for Amazing One Bedroom Apartment in Prime Brooklyn in Bushwick?\", 'answer': 'Alan'}, {'qid': 'easy-airbnb-0001', 'question': \"What is the host's name for Bright, Quiet Astoria 1 Bedroom near trains, MoMi in Astoria?\", 'answer': 'Collin'}, {'qid': 'easy-airbnb-0002', 'question': \"What is the host's name for ★Designer 3BR Apt★ In Times Square in Hell's Kitchen?\", 'answer': 'Vivian'}, {'qid': 'easy-airbnb-0003', 'question': \"What is the host's name for Spacious 3BR Apt - 12 Min to Manhattan in Long Island City?\", 'answer': 'Timo'}, {'qid': 'easy-airbnb-0004', 'question': \"What is the host's name for Charming brownstone apartment in Columbia St?\", 'answer': 'Chapman'}, {'qid': 'easy-airbnb-0005', 'question': \"What is the host's name for Large Room in Trendy Bushwick - 4 in Bushwick?\", 'answer': 'Anthony'}, {'qid': 'easy-airbnb-0006', 'question': \"What is the host's name for Sanabria’s house in Bushwick?\", 'answer': 'Edwin'}, {'qid': 'easy-airbnb-0007', 'question': \"What is the host's name for Luxury Wellness Space - Breathtaking NYC Views in Long Island City?\", 'answer': 'Sara'}, {'qid': 'easy-airbnb-0008', 'question': \"What is the host's name for Brooklyn's heart, Ft Greene! in Fort Greene?\", 'answer': 'Andrew'}, {'qid': 'easy-airbnb-0009', 'question': \"What is the host's name for 1 BEDROOM ASTORIA APARTMENT / 15 min to Manhattan in Astoria?\", 'answer': 'Kijin'}]\n"
     ]
    }
   ],
   "source": [
    "for i in range(num_questions_per_template):\n",
    "    answer = \"\"\n",
    "    while answer == \"\":\n",
    "        random_index = random.randint(0, len(data)-1)\n",
    "        row = data.iloc[random_index]\n",
    "        Name = row[\"NAME\"]\n",
    "        id = row[\"id\"]\n",
    "        neighbourhood = row[\"neighbourhood\"]\n",
    "        question = \"What is the host's name for {} in {}?\".format(Name, neighbourhood)\n",
    "        sub_table = data.loc[(data[\"NAME\"] == Name) & (data[\"neighbourhood\"] == neighbourhood)]\n",
    "        if len(sub_table) == 1:\n",
    "            answer = row[\"host name\"]\n",
    "        else:\n",
    "            answer = \"\"\n",
    "    questions.append({\"qid\": \"easy-airbnb-{:0>4d}\".format(question_id), \"question\":question, \"answer\":answer})\n",
    "    question_id += 1\n",
    "print(questions[-10:])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'qid': 'easy-airbnb-0010', 'question': 'How many days are Cozy 2 bedroom 5min LGA/15min JFK  on main floor (id: 32113500) available during a year (365 days)?', 'answer': '347'}, {'qid': 'easy-airbnb-0011', 'question': 'How many days are Cozy Brooklyn Room (id: 32595105) available during a year (365 days)?', 'answer': '99'}, {'qid': 'easy-airbnb-0012', 'question': 'How many days are Private bathroom and bedroom for 1 -2 people (id: 14295197) available during a year (365 days)?', 'answer': '0'}, {'qid': 'easy-airbnb-0013', 'question': 'How many days are Cute Studio in Bushwick BK, NYC (id: 3286753) available during a year (365 days)?', 'answer': '10'}, {'qid': 'easy-airbnb-0014', 'question': 'How many days are 20 mins to Times Square!  Vibrant NYC neighborhood (id: 52276316) available during a year (365 days)?', 'answer': '107'}, {'qid': 'easy-airbnb-0015', 'question': 'How many days are Large BR in Spacious Artist Loft (id: 47523222) available during a year (365 days)?', 'answer': '135'}, {'qid': 'easy-airbnb-0016', 'question': 'How many days are Brooklyn Garden Apartment (id: 15163413) available during a year (365 days)?', 'answer': '51'}, {'qid': 'easy-airbnb-0017', 'question': 'How many days are MINIMALISTIC APARTMENT/DECK IN HISTORIC BROWNSTONE (id: 16455242) available during a year (365 days)?', 'answer': '343'}, {'qid': 'easy-airbnb-0018', 'question': 'How many days are Location! Location! Location! ... in a cute studio (id: 42878931) available during a year (365 days)?', 'answer': '0'}, {'qid': 'easy-airbnb-0019', 'question': 'How many days are Elegant Large One Bed  / 1.5 Bathroom-  UWS (id: 48875805) available during a year (365 days)?', 'answer': '325'}]\n"
     ]
    }
   ],
   "source": [
    "for i in range(num_questions_per_template):\n",
    "    answer = \"\"\n",
    "    while answer == \"\":\n",
    "        random_index = random.randint(0, len(data)-1)\n",
    "        row = data.iloc[random_index]\n",
    "        Name = row[\"NAME\"]\n",
    "        id = row[\"id\"]\n",
    "        question = \"How many days are {} (id: {}) available during a year (365 days)?\".format(Name, id)\n",
    "        sub_table = data.loc[(data[\"NAME\"] == Name) & (data[\"id\"] == id)]\n",
    "        if len(sub_table) == 1:\n",
    "            answer = str(int(row[\"availability 365\"]))\n",
    "        else:\n",
    "            answer = \"\"\n",
    "    questions.append({\"qid\": \"easy-airbnb-{:0>4d}\".format(question_id), \"question\":question, \"answer\":answer})\n",
    "    question_id += 1\n",
    "print(questions[-10:])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'qid': 'easy-airbnb-0020', 'question': 'What is the room type of Spacious Room in Great Neighborhood (id: 8266290) in Clinton Hill?', 'answer': 'Private room'}, {'qid': 'easy-airbnb-0021', 'question': 'What is the room type of Charming East Village Apartment (id: 1867895) in East Village?', 'answer': 'Entire home/apt'}, {'qid': 'easy-airbnb-0022', 'question': 'What is the room type of Cute, spacious room in Crown Heights (id: 13713073) in Crown Heights?', 'answer': 'Private room'}, {'qid': 'easy-airbnb-0023', 'question': 'What is the room type of Cozy Apartment - Try NYC Living! (id: 52815913) in Long Island City?', 'answer': 'Entire home/apt'}, {'qid': 'easy-airbnb-0024', 'question': 'What is the room type of MODERN APARTMENT MINUTES AWAY FROM MANHATTAN (id: 27014666) in Kingsbridge?', 'answer': 'Entire home/apt'}, {'qid': 'easy-airbnb-0025', 'question': 'What is the room type of Private room, modern apartment w/ PRIVATE ROOF! (id: 16693836) in Williamsburg?', 'answer': 'Private room'}, {'qid': 'easy-airbnb-0026', 'question': 'What is the room type of Room for rent - Right by Bedford ave L train (id: 28307600) in Williamsburg?', 'answer': 'Private room'}, {'qid': 'easy-airbnb-0027', 'question': 'What is the room type of Industrial Modernism Flex 2br Loft! (id: 2952060) in Greenpoint?', 'answer': 'Entire home/apt'}, {'qid': 'easy-airbnb-0028', 'question': 'What is the room type of GARDEN UTOPIA, UES; SUPERIOR LOCATION & AMENITIES (id: 18737898) in Upper East Side?', 'answer': 'Entire home/apt'}, {'qid': 'easy-airbnb-0029', 'question': 'What is the room type of Large private bedroom&bath 2 min. away from subway (id: 42892739) in Williamsburg?', 'answer': 'Private room'}]\n"
     ]
    }
   ],
   "source": [
    "for i in range(num_questions_per_template):\n",
    "    answer = \"\"\n",
    "    while answer == \"\":\n",
    "        random_index = random.randint(0, len(data)-1)\n",
    "        row = data.iloc[random_index]\n",
    "        Name = row[\"NAME\"]\n",
    "        id = row[\"id\"]\n",
    "        neighbourhood = row[\"neighbourhood\"]\n",
    "        question = \"What is the room type of {} (id: {}) in {}?\".format(Name, id, neighbourhood)\n",
    "        sub_table = data.loc[(data[\"NAME\"] == Name) & (data[\"id\"] == id) & (data[\"neighbourhood\"] == neighbourhood)]\n",
    "        if len(sub_table) == 1:\n",
    "            answer = row[\"room type\"]\n",
    "        else:\n",
    "            answer = \"\"\n",
    "    questions.append({\"qid\": \"easy-airbnb-{:0>4d}\".format(question_id), \"question\":question, \"answer\":answer})\n",
    "    question_id += 1\n",
    "print(questions[-10:])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'qid': 'easy-airbnb-0030', 'question': 'What is the price of Spacious 2 BR Bedford Stuyvesant Bklyn NY (id: 44668936) in Bedford-Stuyvesant?', 'answer': '$479 '}, {'qid': 'easy-airbnb-0031', 'question': 'What is the price of Centrally  Located Beautiful Escape (id: 42418313) in East Flatbush?', 'answer': '$828 '}, {'qid': 'easy-airbnb-0032', 'question': \"What is the price of Perfect 1 Bedroom in Manhattan,close to everything (id: 33393179) in Hell's Kitchen?\", 'answer': '$572 '}, {'qid': 'easy-airbnb-0033', 'question': 'What is the price of 4Js Room4You (id: 54980377) in East Flatbush?', 'answer': '$348 '}, {'qid': 'easy-airbnb-0034', 'question': 'What is the price of Spacious 5 Bedroom Apt Williamsburg (id: 47825330) in Williamsburg?', 'answer': '$87 '}, {'qid': 'easy-airbnb-0035', 'question': 'What is the price of Tranquil Columbus Cir. 1BR, Doorman, River Views by Blueground (id: 22381421) in Upper West Side?', 'answer': '$1,146 '}, {'qid': 'easy-airbnb-0036', 'question': 'What is the price of Your Quiet Refuge in the Heart of Manhattan (id: 45850305) in Civic Center?', 'answer': '$1,150 '}, {'qid': 'easy-airbnb-0037', 'question': 'What is the price of Beautiful, spacious 2 bed, Crown Heights apartment (id: 54098906) in Brownsville?', 'answer': '$629 '}, {'qid': 'easy-airbnb-0038', 'question': 'What is the price of Brooklyn oasis with breathtaking skyline view! (id: 40452125) in Bedford-Stuyvesant?', 'answer': '$799 '}, {'qid': 'easy-airbnb-0039', 'question': 'What is the price of Sun drenched loft like space with private yard (id: 18732928) in Williamsburg?', 'answer': '$632 '}]\n"
     ]
    }
   ],
   "source": [
    "for i in range(num_questions_per_template):\n",
    "    answer = \"\"\n",
    "    while answer == \"\":\n",
    "        random_index = random.randint(0, len(data)-1)\n",
    "        row = data.iloc[random_index]\n",
    "        Name = row[\"NAME\"]\n",
    "        id = row[\"id\"]\n",
    "        neighbourhood = row[\"neighbourhood\"]\n",
    "        question = \"What is the price of {} (id: {}) in {}?\".format(Name, id, neighbourhood)\n",
    "        sub_table = data.loc[(data[\"NAME\"] == Name) & (data[\"id\"] == id) & (data[\"neighbourhood\"] == neighbourhood)]\n",
    "        if len(sub_table) == 1:\n",
    "            answer = row[\"price\"]\n",
    "        else:\n",
    "            answer = \"\"\n",
    "    questions.append({\"qid\": \"easy-airbnb-{:0>4d}\".format(question_id), \"question\":question, \"answer\":answer})\n",
    "    question_id += 1\n",
    "print(questions[-10:])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'qid': 'easy-airbnb-0040', 'question': 'What is the minimum number of nights of Entire APT in Center of Park Slope Brooklyn (id: 11466316) in South Slope?', 'answer': '6'}, {'qid': 'easy-airbnb-0041', 'question': \"What is the minimum number of nights of #Awesome 2 BR Flat Times Square! (id: 18268996) in Hell's Kitchen?\", 'answer': '2'}, {'qid': 'easy-airbnb-0042', 'question': 'What is the minimum number of nights of Beautiful douplex with garden in crown heights!! (id: 45622205) in Crown Heights?', 'answer': '3'}, {'qid': 'easy-airbnb-0043', 'question': 'What is the minimum number of nights of TRUE2BR-PRIME MIDTOWN EAST~53rd&3rd (id: 7404150) in Midtown?', 'answer': '30'}, {'qid': 'easy-airbnb-0044', 'question': \"What is the minimum number of nights of Luxurious Manhattan 1 Bedroom (id: 13026564) in Hell's Kitchen?\", 'answer': '30'}, {'qid': 'easy-airbnb-0045', 'question': 'What is the minimum number of nights of Charming Boutique Hotel in Chelsea New York (id: 30363813) in Chelsea?', 'answer': '1'}, {'qid': 'easy-airbnb-0046', 'question': \"What is the minimum number of nights of Cozy, Quiet Apt near Times Square and Penn Station (id: 24179158) in Hell's Kitchen?\", 'answer': '10'}, {'qid': 'easy-airbnb-0047', 'question': 'What is the minimum number of nights of Vanderbilt  · Quaint Art-Filled Brooklyn Apartment (id: 46606956) in Clinton Hill?', 'answer': '3'}, {'qid': 'easy-airbnb-0048', 'question': 'What is the minimum number of nights of 1br Apartment EastVillage,Manhattan (id: 10875908) in East Village?', 'answer': '1'}, {'qid': 'easy-airbnb-0049', 'question': 'What is the minimum number of nights of Bushwick Duplex 3 min walk to subway (id: 19458650) in Bushwick?', 'answer': '1'}]\n"
     ]
    }
   ],
   "source": [
    "for i in range(num_questions_per_template):\n",
    "    answer = \"\"\n",
    "    while answer == \"\":\n",
    "        random_index = random.randint(0, len(data)-1)\n",
    "        row = data.iloc[random_index]\n",
    "        Name = row[\"NAME\"]\n",
    "        id = row[\"id\"]\n",
    "        neighbourhood = row[\"neighbourhood\"]\n",
    "        question = \"What is the minimum number of nights of {} (id: {}) in {}?\".format(Name, id, neighbourhood)\n",
    "        sub_table = data.loc[(data[\"NAME\"] == Name) & (data[\"id\"] == id) & (data[\"neighbourhood\"] == neighbourhood)]\n",
    "        if len(sub_table) == 1:\n",
    "            answer = str(int(row[\"minimum nights\"]))\n",
    "        else:\n",
    "            answer = \"\"\n",
    "    questions.append({\"qid\": \"easy-airbnb-{:0>4d}\".format(question_id), \"question\":question, \"answer\":answer})\n",
    "    question_id += 1\n",
    "print(questions[-10:])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'qid': 'easy-airbnb-0050', 'question': 'When was of Large Room in Queens (id: 44353020) in Rego Park constructed?', 'answer': '2003'}, {'qid': 'easy-airbnb-0051', 'question': 'When was of Bright Apartment in Clinton Hill (id: 9290254) in Clinton Hill constructed?', 'answer': '2012'}, {'qid': 'easy-airbnb-0052', 'question': 'When was of Huge Brownstone,Private Garden- Pet chicks & eggs. (id: 50048890) in Bedford-Stuyvesant constructed?', 'answer': '2010'}, {'qid': 'easy-airbnb-0053', 'question': 'When was of The Orchid (id: 37122308) in Flatbush constructed?', 'answer': '2013'}, {'qid': 'easy-airbnb-0054', 'question': 'When was of Stunning arty 3200sf 3FLR+3BR townhome w/terrace (id: 1228884) in Park Slope constructed?', 'answer': '2016'}, {'qid': 'easy-airbnb-0055', 'question': 'When was of Bright 2-bedroom in Brooklyn (id: 22728818) in Bedford-Stuyvesant constructed?', 'answer': '2022'}, {'qid': 'easy-airbnb-0056', 'question': 'When was of Charming and Convinient, LOCATION LOCATION! (id: 39064747) in Harlem constructed?', 'answer': '2021'}, {'qid': 'easy-airbnb-0057', 'question': 'When was of Charming Carnegie Hill Bedroom (id: 20204255) in East Harlem constructed?', 'answer': '2013'}, {'qid': 'easy-airbnb-0058', 'question': 'When was of Beautiful 1 bedroom apartment in Greenpoint, BK (id: 46392112) in Greenpoint constructed?', 'answer': '2011'}, {'qid': 'easy-airbnb-0059', 'question': 'When was of big cozy room in Bed-Stuy with separate entrance (id: 50473056) in Bedford-Stuyvesant constructed?', 'answer': '2009'}]\n"
     ]
    }
   ],
   "source": [
    "for i in range(num_questions_per_template):\n",
    "    answer = \"\"\n",
    "    while answer == \"\":\n",
    "        random_index = random.randint(0, len(data)-1)\n",
    "        row = data.iloc[random_index]\n",
    "        Name = row[\"NAME\"]\n",
    "        id = row[\"id\"]\n",
    "        neighbourhood = row[\"neighbourhood\"]\n",
    "        question = \"When was of {} (id: {}) in {} constructed?\".format(Name, id, neighbourhood)\n",
    "        sub_table = data.loc[(data[\"NAME\"] == Name) & (data[\"id\"] == id) & (data[\"neighbourhood\"] == neighbourhood)]\n",
    "        if len(sub_table) == 1:\n",
    "            answer = str(int(row[\"Construction year\"]))\n",
    "        else:\n",
    "            answer = \"\"\n",
    "    questions.append({\"qid\": \"easy-airbnb-{:0>4d}\".format(question_id), \"question\":question, \"answer\":answer})\n",
    "    question_id += 1\n",
    "print(questions[-10:])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'qid': 'easy-airbnb-0060', 'question': 'How many reviews does Modern Manhattan Living Suite 2A (id: 3654033) in Harlem have?', 'answer': '36'}, {'qid': 'easy-airbnb-0061', 'question': 'How many reviews does Private Room in WILLIAMSBURG (id: 3211088) in Williamsburg have?', 'answer': '38'}, {'qid': 'easy-airbnb-0062', 'question': 'How many reviews does MASSIVE 8BR/8BTH Brooklyn Townhouse w/ Backyard (id: 30796264) in Bushwick have?', 'answer': '5'}, {'qid': 'easy-airbnb-0063', 'question': 'How many reviews does A+ Location City Studio (Queen Bed & Futon) (id: 31064682) in Midtown have?', 'answer': '36'}, {'qid': 'easy-airbnb-0064', 'question': 'How many reviews does Apartment with backyard in East Williamsburg (id: 43488118) in Williamsburg have?', 'answer': '91'}, {'qid': 'easy-airbnb-0065', 'question': 'How many reviews does PRIVATE BIG ROOM w. Queen Bed, and quiet roommates (id: 42031151) in Melrose have?', 'answer': '2'}, {'qid': 'easy-airbnb-0066', 'question': 'How many reviews does 1BR Loft: Cleaning CDC guidelines implemented (id: 36070177) in Greenpoint have?', 'answer': '5'}, {'qid': 'easy-airbnb-0067', 'question': 'How many reviews does Cute & Cozy Studio in the heart of East Village (id: 23347946) in East Village have?', 'answer': '6'}, {'qid': 'easy-airbnb-0068', 'question': 'How many reviews does Cozy Brownstone Apartment Near City (id: 56740557) in Bedford-Stuyvesant have?', 'answer': '147'}, {'qid': 'easy-airbnb-0069', 'question': 'How many reviews does ✨Bright, spacious apartment in the West Village❤️ (id: 12394733) in West Village have?', 'answer': '86'}]\n"
     ]
    }
   ],
   "source": [
    "for i in range(num_questions_per_template):\n",
    "    answer = \"\"\n",
    "    while answer == \"\":\n",
    "        random_index = random.randint(0, len(data)-1)\n",
    "        row = data.iloc[random_index]\n",
    "        Name = row[\"NAME\"]\n",
    "        id = row[\"id\"]\n",
    "        neighbourhood = row[\"neighbourhood\"]\n",
    "        question = \"How many reviews does {} (id: {}) in {} have?\".format(Name, id, neighbourhood)\n",
    "        sub_table = data.loc[(data[\"NAME\"] == Name) & (data[\"id\"] == id) & (data[\"neighbourhood\"] == neighbourhood)]\n",
    "        if len(sub_table) == 1:\n",
    "            answer = str(int(row[\"number of reviews\"]))\n",
    "        else:\n",
    "            answer = \"\"\n",
    "    questions.append({\"qid\": \"easy-airbnb-{:0>4d}\".format(question_id), \"question\":question, \"answer\":answer})\n",
    "    question_id += 1\n",
    "print(questions[-10:])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'qid': 'easy-airbnb-0070', 'question': \"What is the last review date of Hudson Yards 1 Bedroom (LGBT friendly) (id: 27031235) in Hell's Kitchen?\", 'answer': nan}, {'qid': 'easy-airbnb-0071', 'question': 'What is the last review date of Large 1 bdrm with Modern finishes - Pelham Gardens (id: 17531123) in Pelham Gardens?', 'answer': '7/4/2019'}, {'qid': 'easy-airbnb-0072', 'question': 'What is the last review date of LUXURY MURRAY HILL EAST 34TH~1BR (id: 15549470) in Murray Hill?', 'answer': '6/23/2019'}, {'qid': 'easy-airbnb-0073', 'question': 'What is the last review date of Modern 2BR Apartment in Heart of East Village (id: 4618349) in East Village?', 'answer': '1/1/2017'}, {'qid': 'easy-airbnb-0074', 'question': 'What is the last review date of Brand New Cozy Apt Upper West Manhattan (id: 20436773) in Upper West Side?', 'answer': '6/24/2019'}, {'qid': 'easy-airbnb-0075', 'question': 'What is the last review date of Bright, Clean + Beautiful West Village 1 Bedroom (id: 15396483) in West Village?', 'answer': '3/11/2019'}, {'qid': 'easy-airbnb-0076', 'question': 'What is the last review date of Clean, Safe, Convenient, Comfy (id: 3494971) in Harlem?', 'answer': '9/29/2016'}, {'qid': 'easy-airbnb-0077', 'question': 'What is the last review date of One bedroom available in Astoria (id: 6316671) in Long Island City?', 'answer': nan}, {'qid': 'easy-airbnb-0078', 'question': 'What is the last review date of Home For Medical Professionals - \"Laudanum\" (id: 29397288) in Bushwick?', 'answer': '2/26/2022'}, {'qid': 'easy-airbnb-0079', 'question': 'What is the last review date of PRIVATE GARDEN APARTMENT IN HISTORIC BROWNSTONE (id: 37738123) in Bedford-Stuyvesant?', 'answer': '11/7/2020'}]\n"
     ]
    }
   ],
   "source": [
    "for i in range(num_questions_per_template):\n",
    "    answer = \"\"\n",
    "    while answer == \"\":\n",
    "        random_index = random.randint(0, len(data)-1)\n",
    "        row = data.iloc[random_index]\n",
    "        Name = row[\"NAME\"]\n",
    "        id = row[\"id\"]\n",
    "        neighbourhood = row[\"neighbourhood\"]\n",
    "        question = \"What is the last review date of {} (id: {}) in {}?\".format(Name, id, neighbourhood)\n",
    "        sub_table = data.loc[(data[\"NAME\"] == Name) & (data[\"id\"] == id) & (data[\"neighbourhood\"] == neighbourhood)]\n",
    "        if len(sub_table) == 1:\n",
    "            answer = row[\"last review\"]\n",
    "        else:\n",
    "            answer = \"\"\n",
    "    questions.append({\"qid\": \"easy-airbnb-{:0>4d}\".format(question_id), \"question\":question, \"answer\":answer})\n",
    "    question_id += 1\n",
    "print(questions[-10:])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'qid': 'easy-airbnb-0080', 'question': 'What is the review rate number of Cozy Manhattan view one bedroom . (id: 36526377) in Tompkinsville?', 'answer': '4.0'}, {'qid': 'easy-airbnb-0081', 'question': 'What is the review rate number of 138 Bowery-Bright Studio (id: 34458013) in Chinatown?', 'answer': '3.0'}, {'qid': 'easy-airbnb-0082', 'question': 'What is the review rate number of CENTRAL GORGEOUS STUDIO - HUGE PRIVATE TERRACE (id: 43367164) in East Village?', 'answer': '2.0'}, {'qid': 'easy-airbnb-0083', 'question': 'What is the review rate number of Trendy Chelsea 1BR w/ Balcony, Gym + Doorman by Blueground (id: 22336685) in Chelsea?', 'answer': '2.0'}, {'qid': 'easy-airbnb-0084', 'question': 'What is the review rate number of Beautiful, New, 1 BR, Heart of Williamsburg! (id: 10019843) in Williamsburg?', 'answer': '4.0'}, {'qid': 'easy-airbnb-0085', 'question': 'What is the review rate number of 2 Bedroom the NYs Most Poppin Neighborhood (id: 25770886) in Lower East Side?', 'answer': '2.0'}, {'qid': 'easy-airbnb-0086', 'question': 'What is the review rate number of Room in cool Bushwick loft! (id: 2130237) in Bedford-Stuyvesant?', 'answer': '1.0'}, {'qid': 'easy-airbnb-0087', 'question': 'What is the review rate number of Amazing View from Contemporary Loft (id: 39977147) in Midtown?', 'answer': '3.0'}, {'qid': 'easy-airbnb-0088', 'question': 'What is the review rate number of Charming Apartment in Brooklyn (id: 43284872) in Bedford-Stuyvesant?', 'answer': '3.0'}, {'qid': 'easy-airbnb-0089', 'question': 'What is the review rate number of Big Room in a Downtown Loft Apartment (id: 37089723) in Financial District?', 'answer': '2.0'}]\n"
     ]
    }
   ],
   "source": [
    "for i in range(num_questions_per_template):\n",
    "    answer = \"\"\n",
    "    while answer == \"\":\n",
    "        random_index = random.randint(0, len(data)-1)\n",
    "        row = data.iloc[random_index]\n",
    "        Name = row[\"NAME\"]\n",
    "        id = row[\"id\"]\n",
    "        neighbourhood = row[\"neighbourhood\"]\n",
    "        question = \"What is the review rate number of {} (id: {}) in {}?\".format(Name, id, neighbourhood)\n",
    "        sub_table = data.loc[(data[\"NAME\"] == Name) & (data[\"id\"] == id) & (data[\"neighbourhood\"] == neighbourhood)]\n",
    "        if len(sub_table) == 1:\n",
    "            answer = str(row[\"review rate number\"])\n",
    "        else:\n",
    "            answer = \"\"\n",
    "    questions.append({\"qid\": \"easy-airbnb-{:0>4d}\".format(question_id), \"question\":question, \"answer\":answer})\n",
    "    question_id += 1\n",
    "print(questions[-10:])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'qid': 'easy-airbnb-0090', 'question': 'What is the average number of reviews per month of Hotel Room King Bed TSQ (id: 37421103) in Midtown?', 'answer': '0.08'}, {'qid': 'easy-airbnb-0091', 'question': 'What is the average number of reviews per month of Big, Bright, Beautiful Room (id: 8857251) in Crown Heights?', 'answer': '0.07'}, {'qid': 'easy-airbnb-0092', 'question': 'What is the average number of reviews per month of Charming 2brs w/ private garden in Carroll Gardens (id: 13763884) in Gowanus?', 'answer': '0.19'}, {'qid': 'easy-airbnb-0093', 'question': 'What is the average number of reviews per month of Cozy home (id: 53206389) in Midwood?', 'answer': '1.88'}, {'qid': 'easy-airbnb-0094', 'question': 'What is the average number of reviews per month of Steps from the Sand Studio (id: 28516922) in Belle Harbor?', 'answer': '2.25'}, {'qid': 'easy-airbnb-0095', 'question': 'What is the average number of reviews per month of 1 Bedroom w/ common space in 4br Loft in Bushwick (id: 56440658) in Bedford-Stuyvesant?', 'answer': 'nan'}, {'qid': 'easy-airbnb-0096', 'question': 'What is the average number of reviews per month of Modern Private Bedroom-15min from the Manhattan! (id: 45618339) in Astoria?', 'answer': 'nan'}, {'qid': 'easy-airbnb-0097', 'question': \"What is the average number of reviews per month of Brooklyn's finest by Prospect Park (id: 40505698) in Prospect-Lefferts Gardens?\", 'answer': '1.37'}, {'qid': 'easy-airbnb-0098', 'question': 'What is the average number of reviews per month of One bedroom apartment in Brooklyn, NY (id: 22332819) in Park Slope?', 'answer': '0.81'}, {'qid': 'easy-airbnb-0099', 'question': 'What is the average number of reviews per month of Relax comfortably 1BR w/AC in the Heart of Queens1 (id: 27199687) in East Elmhurst?', 'answer': '8.0'}]\n"
     ]
    }
   ],
   "source": [
    "for i in range(num_questions_per_template):\n",
    "    answer = \"\"\n",
    "    while answer == \"\":\n",
    "        random_index = random.randint(0, len(data)-1)\n",
    "        row = data.iloc[random_index]\n",
    "        Name = row[\"NAME\"]\n",
    "        id = row[\"id\"]\n",
    "        neighbourhood = row[\"neighbourhood\"]\n",
    "        question = \"What is the average number of reviews per month of {} (id: {}) in {}?\".format(Name, id, neighbourhood)\n",
    "        sub_table = data.loc[(data[\"NAME\"] == Name) & (data[\"id\"] == id) & (data[\"neighbourhood\"] == neighbourhood)]\n",
    "        if len(sub_table) == 1:\n",
    "            answer = str(row[\"reviews per month\"])\n",
    "        else:\n",
    "            answer = \"\"\n",
    "    questions.append({\"qid\": \"easy-airbnb-{:0>4d}\".format(question_id), \"question\":question, \"answer\":answer})\n",
    "    question_id += 1\n",
    "print(questions[-10:])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "with jsonlines.open('/<YOUR_OWN_PATH>/ToolQA/data/questions/easy/airbnb-easy.jsonl', mode='w') as writer:\n",
    "    for row in questions:\n",
    "        writer.write(row)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "llm",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.16"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
